<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   《面向程序员的数据挖掘指南》——（七）朴素贝叶斯和文本数据  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://dataunion.org/8895.html",
                  host: "http://dataunion.org",
                  prePath: "http://dataunion.org",
                  scheme: "http",
                  pathBase: "http://dataunion.org/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466457529: Accept with keywords: (title(0.375):指南,社区,数盟,程序员,面向,数据挖掘,文本,数据, topn(0.4):指南,概率,数盟,分类器,程序员,数据挖掘,测试集,酸奶,目录,数据分析,效果,结果,元素,电影,面向,计算,文章,评价,数据,新闻组,公式,词表,常用词,次数,算法,新闻,分类,列表,单词,文本).-->
 </head>
 <body onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    《面向程序员的数据挖掘指南》——（七）朴素贝叶斯和文本数据
   </h1>
  </div>
  <div class="container" id="xmain">
   ﻿﻿
   <title>
    《面向程序员的数据挖掘指南》——（七）朴素贝叶斯和文本数据 | 数盟社区
   </title>
   <!-- All in One SEO Pack 2.2.7.6.2 by Michael Torbert of Semper Fi Web Design[32,76] -->
   <!-- /all in one seo pack -->
   <!--
<div align="center">
<a href="http://strata.oreilly.com.cn/hadoop-big-data-cn?cmp=mp-data-confreg-home-stcn16_dataunion_pc" target="_blank"><img src="http://dataunion.org/wp-content/uploads/2016/05/stratabj.jpg"/ ></a>
</div>
-->
   <header id="header-web">
    <div class="header-main">
     <hgroup class="logo">
      <h1>
       <a href="http://dataunion.org/" rel="home" title="数盟社区">
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/logo.png"/>
       </a>
      </h1>
     </hgroup>
     <!--logo-->
     <nav class="header-nav">
      <ul class="menu" id="menu-%e4%b8%bb%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-71" id="menu-item-71">
        <a href="http://dataunion.org/category/events" title="events">
         活动
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22457" id="menu-item-22457">
          <a href="http://dataunion.org/2016timeline">
           2016档期
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22459" id="menu-item-22459">
          <a href="http://dataunion.org/category/parterc">
           合作会议
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor menu-item-has-children menu-item-20869" id="menu-item-20869">
        <a href="http://dataunion.org/category/tech" title="articles">
         文章
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20867" id="menu-item-20867">
          <a href="http://dataunion.org/category/tech/base" title="base">
           基础架构
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3302" id="menu-item-3302">
          <a href="http://dataunion.org/category/tech/ai" title="ai">
           人工智能
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor current-menu-parent current-post-parent menu-item-3303" id="menu-item-3303">
          <a href="http://dataunion.org/category/tech/analysis" title="analysis">
           数据分析
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21920" id="menu-item-21920">
          <a href="http://dataunion.org/category/tech/dm">
           数据挖掘
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3314" id="menu-item-3314">
          <a href="http://dataunion.org/category/tech/viz" title="viz">
           可视化
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3305" id="menu-item-3305">
          <a href="http://dataunion.org/category/tech/devl" title="devl">
           编程语言
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-20876" id="menu-item-20876">
        <a href="http://dataunion.org/category/industry">
         行业
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-16328" id="menu-item-16328">
          <a href="http://dataunion.org/category/industry/case" title="case">
           行业应用
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-2112" id="menu-item-2112">
          <a href="http://dataunion.org/category/industry/demo" title="demo">
           Demo展示
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21562" id="menu-item-21562">
          <a href="http://dataunion.org/category/industry/news">
           行业资讯
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-311" id="menu-item-311">
        <a href="http://dataunion.org/category/sources" title="sources">
         资源
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20870" id="menu-item-20870">
        <a href="http://dataunion.org/category/books" title="book">
         图书
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21363" id="menu-item-21363">
        <a href="http://dataunion.org/category/training">
         课程
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-21853" id="menu-item-21853">
        <a href="http://dataunion.org/category/jobs">
         职位
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22050" id="menu-item-22050">
          <a href="http://dataunion.org/category/career">
           职业规划
          </a>
         </li>
        </ul>
       </li>
      </ul>
     </nav>
     <!--header-nav-->
    </div>
   </header>
   <!--header-web-->
   <div id="main">
    <div id="soutab">
     <form action="http://dataunion.org/" class="search" method="get">
     </form>
    </div>
    <div id="container">
     <nav id="mbx">
      当前位置：
      <a href="http://dataunion.org">
       首页
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech">
       文章
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech/analysis">
       数据分析
      </a>
      &gt;  正文
     </nav>
     <!--mbx-->
     <article class="content">
      <header align="centre" class="contenttitle">
       <div class="mscc">
        <h1 class="mscctitle">
         <a href="http://dataunion.org/8895.html">
          《面向程序员的数据挖掘指南》——（七）朴素贝叶斯和文本数据
         </a>
        </h1>
        <address class="msccaddress ">
         <em>
          3,362 次阅读 -
         </em>
         <a href="http://dataunion.org/category/tech/analysis" rel="category tag">
          数据分析
         </a>
        </address>
       </div>
      </header>
      <div class="content-text">
       <h2>
        <span style="font-size: 10pt;">
         译者：
         <span class="author">
          <a data-original-title="" href="https://github.com/jizhang" rel="author" title="">
           jizhang
          </a>
         </span>
        </span>
       </h2>
       <blockquote>
        <p>
         <a data-original-title="" href="http://dataunion.org/?p=7965" title="">
          《面向程序员的数据挖掘指南》——（一）简介：数据挖掘是什么
         </a>
        </p>
        <p>
         <a data-original-title="" href="http://dataunion.org/?p=7989" title="">
          《面向程序员的数据挖掘指南》——（二）推荐系统入门
         </a>
        </p>
        <p>
         <a data-original-title="" href="http://dataunion.org/?p=8082" title="">
          《面向程序员的数据挖掘指南》——（三）隐式评价和基于物品的过滤算法
         </a>
        </p>
        <p>
         <a data-original-title="" href="http://dataunion.org/?p=8137" title="">
          《面向程序员的数据挖掘指南》——（四）分类
         </a>
        </p>
        <p>
         <a data-original-title="" href="http://dataunion.org/?p=8591" title="">
          《面向程序员的数据挖掘指南》——（五）进一步探索分类
         </a>
        </p>
        <p>
         <a href="http://dataunion.org/?p=8660">
          《面向程序员的数据挖掘指南》——（六）朴素贝叶斯和概率
         </a>
        </p>
        <p>
         <a href="http://dataunion.org/?p=8895">
          《面向程序员的数据挖掘指南》——（七）朴素贝叶斯和文本数据
         </a>
        </p>
        <p>
         <a href="http://dataunion.org/?p=9401">
          《面向程序员的数据挖掘指南》——（八）聚类
         </a>
        </p>
       </blockquote>
       <h2>
        非结构化文本的分类算法
       </h2>
       <p>
        在前几个章节中，我们学习了如何使用人们对物品的评价（五星、顶和踩）来进行推荐；还使用了他们的隐式评价——买过什么，点击过什么；我们利用特征来进行分类，如身高、体重、对法案的投票等。这些数据有一个共性——能用表格来展现：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-1.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-1.png"/>
        </a>
       </p>
       <p>
        因此这类数据我们称为“结构化数据”——数据集中的每条数据（上表中的一行）由多个特征进行描述（上表中的列）。而非结构化的数据指的是诸如电子邮件文本、推特信息、博客、新闻等。这些数据至少第一眼看起来是无法用一张表格来展现的。
       </p>
       <p>
        举个例子，我们想从推特信息中获取用户对各种电影的评价：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-2.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-2.png"/>
        </a>
       </p>
       <p>
        可以看到，Andy Gavin喜欢看地心引力，因为他的消息中有“不寒而栗”、“演的太棒了”之类的文本。而Debra Murphy则不太喜欢这部电影，因为她说“还是省下看这部电影的钱吧”。如果有人说“我太想看这部电影了，都兴奋坏了！”，我们可以看出她是喜欢这部电影的，即使信息中有“坏”这个字。
       </p>
       <p>
        我在逛超市时看到一种叫Chobani的酸奶，名字挺有趣的，但真的好吃吗？于是我掏出iPhone，谷歌了一把，看到一篇名为“女人不能只吃面包”的博客：
       </p>
       <blockquote>
        <p>
         <strong>
          无糖酸奶品评
         </strong>
        </p>
        <p>
         你喝过Chobani酸奶吗？如果没有，就赶紧拿起钥匙出门去买吧！虽然它是脱脂原味的，但喝起来和酸奶的口感很像，致使我每次喝都有负罪感，因为这分明就是在喝全脂酸奶啊！原味的感觉很酸很够味，你也可以尝试一下蜂蜜口味的。我承认，虽然我在减肥期间不该吃蜂蜜的，但如果我有一天心情很糟想吃甜食，我就会在原味酸奶里舀一勺蜂蜜，太值得了！至于那些水果味的，应该都有糖分在里面，但其实酸奶本身就已经很美味了，水果只是点缀。如果你家附近没有Chobani，也可以试试Fage，同样好吃。
        </p>
        <p>
         虽然需要花上一美元不到，而且还会增加20卡路里，但还是很值得的，毕竟我已经一下午没吃东西了！
        </p>
        <p>
         <em>
          <a href="http://womandoesnotliveonbreadalone.blogspot.com/2009/03/sugar-free-yogurt-reviews.html">
           http://womandoesnotliveonbreadalone.blogspot.com/2009/03/sugar-free-yogurt-reviews.html
          </a>
         </em>
        </p>
       </blockquote>
       <p>
        这是一篇正面评价吗？从第二句就可以看出，作者非常鼓励我去买。她还用了“够味”、“美味”等词汇，这些都是正面的评价。所以，让我先去吃会儿……
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-3.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-3.png"/>
        </a>
       </p>
       <h3>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E8%87%AA%E5%8A%A8%E5%88%A4%E5%88%AB%E6%96%87%E6%9C%AC%E4%B8%AD%E7%9A%84%E6%84%9F%E6%83%85%E8%89%B2%E5%BD%A9" id="user-content-自动判别文本中的感情色彩">
        </a>
        自动判别文本中的感情色彩
       </h3>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-4.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-4.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         约翰，这条推文应该是称赞地心引力的！
        </p>
       </blockquote>
       <p>
        假设我们要构建一个自动判别文本感情色彩的系统，它有什么作用呢？比如说有家公司是售卖健康检测设备的，他们想要知道人们对这款产品的反响如何。他们投放了很多广告，顾客是喜欢（我好想买一台）还是讨厌（看起来很糟糕）呢？再比如苹果公司召开了一次新闻发布会，讨论iPhone现有的问题，结果是正面的还是负面的呢？一位参议会议员对某个法案做了一次公开演讲，那些政治评论家的反应如何？看来这个系统还是有些作用的。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-5.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-5.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         那要怎样构建一套这样的系统呢？
        </p>
       </blockquote>
       <p>
        假设我要从文本中区分顾客对某些食品的喜好，可能就会列出一些表达喜欢的词语，以及表达厌恶的词：
       </p>
       <ul class="task-list">
        <li>
         表达喜欢的词：美味、好吃、不错、喜欢、可口
        </li>
        <li>
         表达厌恶的词：糟糕、难吃、不好、讨厌、恶心
        </li>
       </ul>
       <p>
        比如我们想知道某篇评论对Chobani酸奶的评价是正面的还是负面的，我们可以去统计评论中表达喜欢和厌恶的词的数量，看哪种类型出现的频率高。这种方法也可以应用到其他分类中，比如判断某个人是否支持堕胎，如果他的言论中经常出现“未出生的小孩”，那他很可能是反堕胎的；如果言论中出现“胎儿”这个词比较多，那有可能是支持堕胎的。其实，用词语出现的数量来进行分类还是很容易想到的。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-6.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-6.png"/>
        </a>
       </p>
       <p>
        我们可以使用朴素贝叶斯算法来进行分类，而不是一般的计数。先来回忆一下公式：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-7.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-7.png"/>
        </a>
       </p>
       <p>
        argmax表示选取概率最大的分类；h∈H表示计算每个事件的概率；P(D|h)表示在给定h的条件下，D发生的概率（如给定某类文章，这类文章中特定单词出现的概率）；P(h)则指事件h发生的概率。
       </p>
       <p>
        我们的训练集是一组文本，又称为
        <strong>
         语料库
        </strong>
        。每个文本（即每条记录）是一则140字左右的推文，并被标记为喜欢和讨厌两类。P(h)表示的就是喜欢和讨厌出现的概率。我们的训练集中有1000条记录，喜欢和讨厌各有500条，因此它们的概率是：
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b3609584049666" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          P(喜欢) = 0.5
P(讨厌) = 0.5
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685db5b3609584049666-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3609584049666-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685db5b3609584049666-3">
               3
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685db5b3609584049666-1">
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               喜欢
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                0.5
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3609584049666-2">
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               讨厌
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                0.5
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db5b3609584049666-3">
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0009 seconds] -->
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-8.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-8.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         当我们使用已经标记好分类的数据集进行训练时，这种类型的机器学习称为“监督式学习”。文本分类就是监督式学习的一种。
        </p>
        <p>
         如果训练集没有标好分类，那就称为“非监督式学习”，聚类就是一种非监督式学习，我们将在下一章讲解。
        </p>
        <p>
         还有一些算法结合了监督式和非监督式，通常是在初始化阶段使用分类好的数据，之后再使用未分类的数据进行学习。
        </p>
       </blockquote>
       <p>
        让我们回到上面的公式，首先来看P(D|h)要如何计算——在正面评价中，单词D出现的概率。比如说“Puts the Thrill back in Trhiller”这句话，我们可以统计所有表达“喜欢”的文章中第一个单词是“Puts”的概率，第二个单词是“the”的概率，以此类推。接着我们再计算表达“讨厌”的文章中第一个单词是“Puts”的概率，第二个单词是“the”的概率等等。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-9.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-9.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         谷歌曾统计过英语中大约有一百万的词汇，如果一条推文中有14个单词，那我们就需要计算1,000,000
         <sup>
          14
         </sup>
         个概率了，显然是不现实的。
        </p>
       </blockquote>
       <p>
        的确，这种方法并不可行。我们可以简化一下，不考虑文本中单词的顺序，仅统计表达“喜欢”的文章中某个单词出现的概率。以下是统计方法。
       </p>
       <h2>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E8%AE%AD%E7%BB%83%E9%98%B6%E6%AE%B5" id="user-content-训练阶段">
        </a>
        训练阶段
       </h2>
       <p>
        首先，我们统计所有文本中一共出现了多少个不同的单词，记作“|Vocabulary|”（总词汇表）。对于每个单词w
        <sub>
         k
        </sub>
        ，我们将计算P(w
        <sub>
         k
        </sub>
        |h
        <sub>
         i
        </sub>
        )，每个h
        <sub>
         i
        </sub>
        （喜欢和讨厌两种）的计算步骤如下：
       </p>
       <ol class="task-list">
        <li>
         将该分类下的所有文章合并到一起；
        </li>
        <li>
         统计每个单词出现的数量，记为n；
        </li>
        <li>
         对于总词汇表中的单词w
         <sub>
          k
         </sub>
         ，统计他们在本类文章中出现的次数n
         <sub>
          k
         </sub>
         ：
        </li>
        <li>
         最后应用下方的公式：
        </li>
       </ol>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-10.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-10.png"/>
        </a>
       </p>
       <h2>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E4%BD%BF%E7%94%A8%E6%9C%B4%E7%B4%A0%E8%B4%9D%E5%8F%B6%E6%96%AF%E8%BF%9B%E8%A1%8C%E5%88%86%E7%B1%BB" id="user-content-使用朴素贝叶斯进行分类">
        </a>
        使用朴素贝叶斯进行分类
       </h2>
       <p>
        分类阶段比较简单，直接应用贝叶斯公式就可以了，让我们试试吧！
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-11.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-11.png"/>
        </a>
       </p>
       <p>
        通过训练，我们得到以下概率结果：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-12.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-12.png"/>
        </a>
       </p>
       <p>
        比如下面这句话，要如何判断它是正面还是负面的呢？
       </p>
       <blockquote>
        <p>
         I am stunned by the hype over gravity.
        </p>
       </blockquote>
       <p>
        我们需要计算的是下面两个概率，并选取较高的结果：
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b361c122859562" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          P(like)×P(I|like)×P(am|like)×P(stunned|like)×...
P(dislike)×P(I|dislike)×P(am|dislike)×P(stunned|dislike)×...
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685db5b361c122859562-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b361c122859562-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685db5b361c122859562-3">
               3
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685db5b361c122859562-1">
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                like
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                I
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                like
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                am
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                like
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                stunned
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                like
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-sy">
                .
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685db5b361c122859562-2">
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                dislike
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                I
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                dislike
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                am
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                dislike
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-e">
                P
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                stunned
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                dislike
               </span>
               <span class="crayon-sy">
                )
               </span>
               ×
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-sy">
                .
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685db5b361c122859562-3">
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0022 seconds] -->
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-13.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-13.png"/>
        </a>
       </p>
       <p>
        因此分类的结果是“讨厌”。
       </p>
       <p>
        <strong>
         提示
        </strong>
        结果中的6.22E-22是科学计数法，等价于6.22×10
        <sup>
         -22
        </sup>
        。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-14.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-14.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         哇，这个概率也太小了吧！
        </p>
        <p>
         是的，如果文本中有100个单词，那乘出来的概率就会更小。
        </p>
        <p>
         但是Python不能处理那么小的小数，最后都会变成零的。
        </p>
        <p>
         没错，因此我们要用对数来算——将每个概率的对数相加！
        </p>
       </blockquote>
       <p>
        假设一个包含100字的文本中，每个单词的概率是0.0001，那么计算结果是：
       </p>
       <div class="highlight highlight-python">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b3624797330611" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="pl-k"&gt;&amp;gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class="pl-c1"&gt;0.0001&lt;/span&gt; &lt;span class="pl-k"&gt;**&lt;/span&gt; &lt;span class="pl-c1"&gt;100&lt;/span&gt;
&lt;span class="pl-c1"&gt;0.0&lt;/span&gt;
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db5b3624797330611-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3624797330611-2">
                2
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db5b3624797330611-1">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0.0001
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 *
                </span>
                <span class="crayon-o">
                 *
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 100
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3624797330611-2">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0.0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0116 seconds] -->
       </div>
       <p>
        如果我们用对数相加来运算的话：
       </p>
       <div class="highlight highlight-python">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b362a420042585" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="pl-k"&gt;&amp;gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class="pl-k"&gt;import&lt;/span&gt; math
&lt;span class="pl-k"&gt;&amp;gt;&amp;gt;&amp;gt;&lt;/span&gt; p &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
&lt;span class="pl-k"&gt;&amp;gt;&amp;gt;&amp;gt;&lt;/span&gt; &lt;span class="pl-k"&gt;for&lt;/span&gt; i &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-s3"&gt;range&lt;/span&gt;(&lt;span class="pl-c1"&gt;100&lt;/span&gt;):
...     p &lt;span class="pl-k"&gt;+=&lt;/span&gt; math.log(&lt;span class="pl-c1"&gt;0.0001&lt;/span&gt;)
... 
&lt;span class="pl-k"&gt;&amp;gt;&amp;gt;&amp;gt;&lt;/span&gt; p
&lt;span class="pl-k"&gt;-&lt;/span&gt;&lt;span class="pl-c1"&gt;921.034037197617&lt;/span&gt;
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db5b362a420042585-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b362a420042585-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b362a420042585-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b362a420042585-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b362a420042585-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b362a420042585-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b362a420042585-7">
                7
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db5b362a420042585-1">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 import
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 math
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b362a420042585-2">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 p
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b362a420042585-3">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 i
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 range
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 100
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b362a420042585-4">
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 p
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 math
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 log
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0.0001
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b362a420042585-5">
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-h">
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b362a420042585-6">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 p
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b362a420042585-7">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 921.034037197617
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0341 seconds] -->
       </div>
       <p>
        <strong>
         提示
        </strong>
       </p>
       <ul class="task-list">
        <li>
         b
         <sup>
          n
         </sup>
         = x 可以转换为 log
         <sub>
          b
         </sub>
         x = n
        </li>
        <li>
         log
         <sub>
          10
         </sub>
         (ab) = log
         <sub>
          10
         </sub>
         (a) + log
         <sub>
          10
         </sub>
         (b)
        </li>
       </ul>
       <h2>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E6%96%B0%E9%97%BB%E7%BB%84%E8%AF%AD%E6%96%99%E5%BA%93" id="user-content-新闻组语料库">
        </a>
        新闻组语料库
       </h2>
       <p>
        我们下面要处理的数据集是新闻，这些新闻可以分为不同的新闻组，我们会构造一个分类器来判断某则新闻是属于哪个新闻组的：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-15.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-15.png"/>
        </a>
       </p>
       <p>
        比如下面这则新闻是属于rec.motorcycles组的：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-16.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-16.png"/>
        </a>
       </p>
       <p>
        注意到这则新闻中还有一些拼写错误（如accesories、ussually等），这对分类器是一个不小的挑战。
       </p>
       <p>
        这些数据集都来自
        <a href="http://qwone.com/~jason/20Newsgroups/">
         http://qwone.com/~jason/20Newsgroups/
        </a>
        （我们使用的是20news-bydate数据集），你也可以从
        <a href="http://guidetodatamining.com/guide/data/20news-bydate.zip">
         这里
        </a>
        获得。这个数据集包含18,846个文档，并将训练集（60%）和测试集放在了不同的目录中，每个子目录都是一个新闻组，目录中的文件即新闻文本。
       </p>
       <h3>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E6%8A%8A%E4%B8%8D%E8%A6%81%E7%9A%84%E4%B8%9C%E8%A5%BF%E4%B8%A2%E6%8E%89" id="user-content-把不要的东西丢掉">
        </a>
        把不要的东西丢掉！
       </h3>
       <p>
        比如我们要对下面这篇新闻做分类：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-17.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-17.png"/>
        </a>
       </p>
       <p>
        让我们看看哪些单词是比较重要的：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-18.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-18.png"/>
        </a>
       </p>
       <p>
        (helpful – 重要，not helpful – 不重要）
       </p>
       <p>
        如果我们将英语中最常用的200个单词剔除掉，这篇新闻就成了这样：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-19.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-19.png"/>
        </a>
       </p>
       <p>
        去除掉这些单词后，新闻就只剩下一半大小了。而且，这些单词看上去并不会对分类结果产生影响。H.P. Luhn在他的论文中说“这些组成语法结构的单词是没有意义的，反而会产生很多噪音”。也就是说，将这些“噪音”单词去除后是会提升分类正确率的。我们将这些单词称为“停词”，有专门的停词表可供使用。去除这些词的理由是：
       </p>
       <ol class="task-list">
        <li>
         能够减少需要处理的数据量；
        </li>
        <li>
         这些词的存在会对分类效果产生负面影响。
        </li>
       </ol>
       <h3>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E5%B8%B8%E7%94%A8%E8%AF%8D%E5%92%8C%E5%81%9C%E8%AF%8D" id="user-content-常用词和停词">
        </a>
        常用词和停词
       </h3>
       <p>
        虽然像the、a这种单词的确没有意义，但有些常用词如work、write、school等在某些场合下还是有作用的，如果将他们也列进停词表里可能会有问题。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-20.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-20.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         年轻人，那些常用词是不能随便丢弃的！
        </p>
       </blockquote>
       <p>
        因此在定制停词表时还是需要做些考虑的。比如要判别阿拉伯语文档是在哪个地区书写的，可以只看文章中最常出现的词（和上面的方式相反）。如果你有兴趣，可以到我的
        <a href="http://zacharski.org/">
         个人网站
        </a>
        上看看这篇论文。而在分析聊天记录时，强奸犯会使用更多I、me、you这样的词汇，如果在分析前将这些单词去除了，效果就会变差。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-21.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-21.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         不要盲目地使用停词表！
        </p>
       </blockquote>
       <h3>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E7%BC%96%E5%86%99python%E4%BB%A3%E7%A0%81" id="user-content-编写python代码">
        </a>
        编写Python代码
       </h3>
       <p>
        首先让我们实现朴素贝叶斯分类器的训练部分。训练集的格式是这样的：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-22.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-22.png"/>
        </a>
       </p>
       <p>
        最上层的目录是训练集（20news-bydate-train），其下的子目录代表不同的新闻组（如alt.atheism），子目录中有多个文本文件，即新闻内容。测试集的目录结构也是相同的。因此，分类器的初始化代码要完成以下工作：
       </p>
       <ol class="task-list">
        <li>
         读取停词列表；
        </li>
        <li>
         获取训练集中各目录（分类）的名称；
        </li>
        <li>
         对于各个分类，调用train方法，统计单词出现的次数；
        </li>
        <li>
         计算下面的公式：
        </li>
       </ol>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-23.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-23.png"/>
        </a>
       </p>
       <div class="highlight highlight-python">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b3636018651653" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="pl-k"&gt;from&lt;/span&gt; __future__ &lt;span class="pl-k"&gt;import&lt;/span&gt; print_function
&lt;span class="pl-k"&gt;import&lt;/span&gt; os, codecs, math

&lt;span class="pl-st"&gt;class&lt;/span&gt; &lt;span class="pl-en"&gt;BayesText&lt;/span&gt;:

    &lt;span class="pl-st"&gt;def&lt;/span&gt; &lt;span class="pl-en"&gt;&lt;span class="pl-s3"&gt;__init__&lt;/span&gt;&lt;/span&gt;(&lt;span class="pl-vpf"&gt;self&lt;/span&gt;, &lt;span class="pl-vpf"&gt;trainingdir&lt;/span&gt;, &lt;span class="pl-vpf"&gt;stopwordlist&lt;/span&gt;):
        &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"""&lt;/span&gt;朴素贝叶斯分类器&lt;/span&gt;
&lt;span class="pl-s1"&gt;        trainingdir 训练集目录，子目录是分类，子目录中包含若干文本&lt;/span&gt;
&lt;span class="pl-s1"&gt;        stopwordlist 停词列表（一行一个）&lt;/span&gt;
&lt;span class="pl-s1"&gt;        &lt;span class="pl-pds"&gt;"""&lt;/span&gt;&lt;/span&gt;
        &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        &lt;span class="pl-v"&gt;self&lt;/span&gt;.totals &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        &lt;span class="pl-v"&gt;self&lt;/span&gt;.stopwords &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        f &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-s3"&gt;open&lt;/span&gt;(stopwordlist)
        &lt;span class="pl-k"&gt;for&lt;/span&gt; line &lt;span class="pl-k"&gt;in&lt;/span&gt; f:
            &lt;span class="pl-v"&gt;self&lt;/span&gt;.stopwords[line.strip()] &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
        f.close()
        categories &lt;span class="pl-k"&gt;=&lt;/span&gt; os.listdir(trainingdir)
        &lt;span class="pl-c"&gt;# 将不是目录的元素过滤掉&lt;/span&gt;
        &lt;span class="pl-v"&gt;self&lt;/span&gt;.categories &lt;span class="pl-k"&gt;=&lt;/span&gt; [filename &lt;span class="pl-k"&gt;for&lt;/span&gt; filename &lt;span class="pl-k"&gt;in&lt;/span&gt; categories
                           &lt;span class="pl-k"&gt;if&lt;/span&gt; os.path.isdir(trainingdir &lt;span class="pl-k"&gt;+&lt;/span&gt; filename)]
        &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;Counting ...&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt;)
        &lt;span class="pl-k"&gt;for&lt;/span&gt; category &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.categories:
            &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;    &lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;+&lt;/span&gt; category)
            (&lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category],
             &lt;span class="pl-v"&gt;self&lt;/span&gt;.totals[category]) &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.train(trainingdir, category)
        &lt;span class="pl-c"&gt;# 删除出现次数小于3次的单词&lt;/span&gt;
        toDelete &lt;span class="pl-k"&gt;=&lt;/span&gt; []
        &lt;span class="pl-k"&gt;for&lt;/span&gt; word &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary:
            &lt;span class="pl-k"&gt;if&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary[word] &lt;span class="pl-k"&gt;&amp;lt;&lt;/span&gt; &lt;span class="pl-c1"&gt;3&lt;/span&gt;:
                &lt;span class="pl-c"&gt;# 遍历列表时不能删除元素，因此做一个标记&lt;/span&gt;
                toDelete.append(word)
        &lt;span class="pl-c"&gt;# 删除&lt;/span&gt;
        &lt;span class="pl-k"&gt;for&lt;/span&gt; word &lt;span class="pl-k"&gt;in&lt;/span&gt; toDelete:
            &lt;span class="pl-k"&gt;del&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary[word]
        &lt;span class="pl-c"&gt;# 计算概率&lt;/span&gt;
        vocabLength &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-s3"&gt;len&lt;/span&gt;(&lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary)
        &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;Computing probabilities:&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt;)
        &lt;span class="pl-k"&gt;for&lt;/span&gt; category &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.categories:
            &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;    &lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;+&lt;/span&gt; category)
            denominator &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.totals[category] &lt;span class="pl-k"&gt;+&lt;/span&gt; vocabLength
            &lt;span class="pl-k"&gt;for&lt;/span&gt; word &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary:
                &lt;span class="pl-k"&gt;if&lt;/span&gt; word &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category]:
                    count &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category][word]
                &lt;span class="pl-k"&gt;else&lt;/span&gt;:
                    count &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
                &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category][word] &lt;span class="pl-k"&gt;=&lt;/span&gt; (&lt;span class="pl-s3"&gt;float&lt;/span&gt;(count &lt;span class="pl-k"&gt;+&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;)
                                             &lt;span class="pl-k"&gt;/&lt;/span&gt; denominator)
        &lt;span class="pl-k"&gt;print&lt;/span&gt; (&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;DONE TRAINING&lt;span class="pl-cce"&gt;\n\n&lt;/span&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt;)


    &lt;span class="pl-st"&gt;def&lt;/span&gt; &lt;span class="pl-en"&gt;train&lt;/span&gt;(&lt;span class="pl-vpf"&gt;self&lt;/span&gt;, &lt;span class="pl-vpf"&gt;trainingdir&lt;/span&gt;, &lt;span class="pl-vpf"&gt;category&lt;/span&gt;):
        &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"""&lt;/span&gt;计算分类下各单词出现的次数&lt;span class="pl-pds"&gt;"""&lt;/span&gt;&lt;/span&gt;
        currentdir &lt;span class="pl-k"&gt;=&lt;/span&gt; trainingdir &lt;span class="pl-k"&gt;+&lt;/span&gt; category
        files &lt;span class="pl-k"&gt;=&lt;/span&gt; os.listdir(currentdir)
        counts &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        total &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        &lt;span class="pl-k"&gt;for&lt;/span&gt; file &lt;span class="pl-k"&gt;in&lt;/span&gt; files:
            &lt;span class="pl-c"&gt;#print(currentdir + '/' + file)&lt;/span&gt;
            f &lt;span class="pl-k"&gt;=&lt;/span&gt; codecs.open(currentdir &lt;span class="pl-k"&gt;+&lt;/span&gt; &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;/&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;+&lt;/span&gt; file, &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;r&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;, &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;iso8859-1&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;)
            &lt;span class="pl-k"&gt;for&lt;/span&gt; line &lt;span class="pl-k"&gt;in&lt;/span&gt; f:
                tokens &lt;span class="pl-k"&gt;=&lt;/span&gt; line.split()
                &lt;span class="pl-k"&gt;for&lt;/span&gt; token &lt;span class="pl-k"&gt;in&lt;/span&gt; tokens:
                    &lt;span class="pl-c"&gt;# 删除标点符号，并将单词转换为小写&lt;/span&gt;
                    token &lt;span class="pl-k"&gt;=&lt;/span&gt; token.strip(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;span class="pl-cce"&gt;\'&lt;/span&gt;".,?:-&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;)
                    token &lt;span class="pl-k"&gt;=&lt;/span&gt; token.lower()
                    &lt;span class="pl-k"&gt;if&lt;/span&gt; token &lt;span class="pl-k"&gt;!=&lt;/span&gt; &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;and&lt;/span&gt; &lt;span class="pl-k"&gt;not&lt;/span&gt; token &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.stopwords:
                        &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary.setdefault(token, &lt;span class="pl-c1"&gt;0&lt;/span&gt;)
                        &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary[token] &lt;span class="pl-k"&gt;+=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
                        counts.setdefault(token, &lt;span class="pl-c1"&gt;0&lt;/span&gt;)
                        counts[token] &lt;span class="pl-k"&gt;+=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
                        total &lt;span class="pl-k"&gt;+=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
            f.close()
        &lt;span class="pl-k"&gt;return&lt;/span&gt;(counts, total)
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-21">
                21
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-22">
                22
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-23">
                23
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-24">
                24
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-25">
                25
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-26">
                26
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-27">
                27
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-28">
                28
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-29">
                29
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-30">
                30
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-31">
                31
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-32">
                32
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-33">
                33
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-34">
                34
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-35">
                35
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-36">
                36
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-37">
                37
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-38">
                38
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-39">
                39
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-40">
                40
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-41">
                41
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-42">
                42
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-43">
                43
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-44">
                44
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-45">
                45
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-46">
                46
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-47">
                47
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-48">
                48
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-49">
                49
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-50">
                50
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-51">
                51
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-52">
                52
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-53">
                53
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-54">
                54
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-55">
                55
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-56">
                56
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-57">
                57
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-58">
                58
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-59">
                59
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-60">
                60
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-61">
                61
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-62">
                62
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-63">
                63
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-64">
                64
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-65">
                65
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-66">
                66
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-67">
                67
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-68">
                68
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-69">
                69
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-70">
                70
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-71">
                71
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-72">
                72
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-73">
                73
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3636018651653-74">
                74
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3636018651653-75">
                75
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db5b3636018651653-1">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 from
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 __future__
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 import
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 print_function
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-2">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 import
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 codecs
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 math
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-3">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-4">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 BayesText
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-5">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-6">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 def
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 __init__
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 stopwordlist
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-7">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;朴素贝叶斯分类器&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-8">
                <span class="crayon-s">
                 &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 s1
                </span>
                <span class="crayon-s">
                 "&gt;        trainingdir 训练集目录，子目录是分类，子目录中包含若干文本&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-9">
                <span class="crayon-s">
                 &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 s1
                </span>
                <span class="crayon-s">
                 "&gt;        stopwordlist 停词列表（一行一个）&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-10">
                <span class="crayon-s">
                 &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 s1
                </span>
                <span class="crayon-s">
                 "&gt;        &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-11">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-12">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-13">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 totals
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-14">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stopwords
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-15">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 open
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 stopwordlist
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-16">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-17">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stopwords
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 strip
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-18">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 close
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-19">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 listdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-20">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 将不是目录的元素过滤掉&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-21">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-22">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 path
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 isdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-23">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;Counting ...&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-24">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-25">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;    &lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-26">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-27">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 totals
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 train
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-28">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 删除出现次数小于3次的单词&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-29">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 toDelete
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-30">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-31">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 lt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 3
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-32">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 遍历列表时不能删除元素，因此做一个标记&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-33">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 toDelete
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 append
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-34">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 删除&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-35">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 toDelete
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-36">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 del
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-37">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 计算概率&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-38">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 vocabLength
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 len
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-39">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;Computing probabilities:&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-40">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-41">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;    &lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-42">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 denominator
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 totals
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 vocabLength
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-43">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-44">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-45">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 count
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-46">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 else
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-47">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 count
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-48">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-t">
                 word
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-t">
                 float
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 count
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-49">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 denominator
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-50">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;DONE TRAINING&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 cce
                </span>
                <span class="crayon-s">
                 "&gt;\n\n&lt;/span&gt;&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-51">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-52">
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-53">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 def
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 train
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-54">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;计算分类下各单词出现的次数&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-55">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 currentdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 trainingdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 category
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-56">
                <span class="crayon-e">
                </span>
                <span class="crayon-v">
                 files
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 listdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 currentdir
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-57">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 counts
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-58">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-59">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 file
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 files
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-60">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #print(currentdir + '/' + file)&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-61">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 codecs
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 open
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 currentdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;/&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 file
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;r&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;iso8859-1&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-62">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-63">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 tokens
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 split
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-64">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 tokens
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-65">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 删除标点符号，并将单词转换为小写&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-66">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 strip
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;&lt;span class="pl-cce"&gt;\'&lt;/span&gt;".,?:-&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-67">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 lower
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-68">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 !=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 and
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 not
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 stopwords
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-69">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 setdefault
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-70">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-71">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 counts
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 setdefault
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-72">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 counts
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-73">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3636018651653-74">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 close
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3636018651653-75">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 return
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 counts
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.2744 seconds] -->
       </div>
       <p>
        训练结果存储在一个名为prop的字典里，字典的键是分类，值是另一个字典——键是单词，值是概率。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-24.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-24.png"/>
        </a>
       </p>
       <p>
        god这个词在rec.motorcycles新闻组中出现的概率是0.00013，而在soc.religion.christian新闻组中出现的概率是0.00424。
       </p>
       <p>
        训练阶段的另一个产物是分类列表：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-25.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-25.png"/>
        </a>
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-26.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-26.png"/>
        </a>
       </p>
       <p>
        <strong>
         训练结束了，下面让我们开始进行文本分类吧。
        </strong>
       </p>
       <p>
        请尝试编写一个分类器，达成以下效果：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-27.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-27.png"/>
        </a>
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-28.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-28.png"/>
        </a>
       </p>
       <div class="highlight highlight-python">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b3647819564907" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="pl-st"&gt;def&lt;/span&gt; &lt;span class="pl-en"&gt;classify&lt;/span&gt;(&lt;span class="pl-vpf"&gt;self&lt;/span&gt;, &lt;span class="pl-vpf"&gt;filename&lt;/span&gt;):
        results &lt;span class="pl-k"&gt;=&lt;/span&gt; {}
        &lt;span class="pl-k"&gt;for&lt;/span&gt; category &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.categories:
            results[category] &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        f &lt;span class="pl-k"&gt;=&lt;/span&gt; codecs.open(filename, &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;r&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;, &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;iso8859-1&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;)
        &lt;span class="pl-k"&gt;for&lt;/span&gt; line &lt;span class="pl-k"&gt;in&lt;/span&gt; f:
            tokens &lt;span class="pl-k"&gt;=&lt;/span&gt; line.split()
            &lt;span class="pl-k"&gt;for&lt;/span&gt; token &lt;span class="pl-k"&gt;in&lt;/span&gt; tokens:
                &lt;span class="pl-c"&gt;#print(token)&lt;/span&gt;
                token &lt;span class="pl-k"&gt;=&lt;/span&gt; token.strip(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;span class="pl-cce"&gt;\'&lt;/span&gt;".,?:-&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;).lower()
                &lt;span class="pl-k"&gt;if&lt;/span&gt; token &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.vocabulary:
                    &lt;span class="pl-k"&gt;for&lt;/span&gt; category &lt;span class="pl-k"&gt;in&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.categories:
                        &lt;span class="pl-k"&gt;if&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category][token] &lt;span class="pl-k"&gt;==&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;:
                            &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;span class="pl-c1"&gt;%s&lt;/span&gt; &lt;span class="pl-c1"&gt;%s&lt;/span&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;%&lt;/span&gt; (category, token))
                        results[category] &lt;span class="pl-k"&gt;+=&lt;/span&gt; math.log(
                            &lt;span class="pl-v"&gt;self&lt;/span&gt;.prob[category][token])
        f.close()
        results &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-s3"&gt;list&lt;/span&gt;(results.items())
        results.sort(&lt;span class="pl-vpf"&gt;key&lt;/span&gt;&lt;span class="pl-k"&gt;=&lt;/span&gt;&lt;span class="pl-st"&gt;lambda&lt;/span&gt; &lt;span class="pl-vpf"&gt;tuple&lt;/span&gt;: &lt;span class="pl-s3"&gt;tuple&lt;/span&gt;[&lt;span class="pl-c1"&gt;1&lt;/span&gt;], &lt;span class="pl-vpf"&gt;reverse&lt;/span&gt; &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;True&lt;/span&gt;)
        &lt;span class="pl-c"&gt;# 如果要调试，可以打印出整个列表。&lt;/span&gt;
        &lt;span class="pl-k"&gt;return&lt;/span&gt; results[&lt;span class="pl-c1"&gt;0&lt;/span&gt;][&lt;span class="pl-c1"&gt;0&lt;/span&gt;]
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3647819564907-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3647819564907-21">
                21
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db5b3647819564907-1">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 def
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 classify
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-2">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 {
                </span>
                <span class="crayon-sy">
                 }
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-3">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-4">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-5">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 codecs
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 open
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;r&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;iso8859-1&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-6">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-7">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 tokens
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 line
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 split
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-8">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 tokens
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-9">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #print(token)&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-10">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 strip
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;&lt;span class="pl-cce"&gt;\'&lt;/span&gt;".,?:-&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 lower
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-11">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 vocabulary
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-12">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-13">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-14">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 c1
                </span>
                <span class="crayon-s">
                 "&gt;%s&lt;/span&gt; &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 c1
                </span>
                <span class="crayon-s">
                 "&gt;%s&lt;/span&gt;&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 %
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-15">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 math
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 log
                </span>
                <span class="crayon-sy">
                 (
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-16">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 prob
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 token
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-17">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 f
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 close
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-18">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 list
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 items
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-19">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 sort
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 key
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 lambda
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 tuple
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 tuple
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 reverse
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-t">
                 True
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3647819564907-20">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 如果要调试，可以打印出整个列表。&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3647819564907-21">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 return
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 results
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ]
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0933 seconds] -->
       </div>
       <p>
        最后我们编写一个函数对测试集中的所有文档进行分类，并计算准确率：
       </p>
       <div class="highlight highlight-python">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685db5b3651184496001" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="pl-st"&gt;def&lt;/span&gt; &lt;span class="pl-en"&gt;testCategory&lt;/span&gt;(&lt;span class="pl-vpf"&gt;self&lt;/span&gt;, &lt;span class="pl-vpf"&gt;directory&lt;/span&gt;, &lt;span class="pl-vpf"&gt;category&lt;/span&gt;):
        files &lt;span class="pl-k"&gt;=&lt;/span&gt; os.listdir(directory)
        total &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        correct &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        &lt;span class="pl-k"&gt;for&lt;/span&gt; file &lt;span class="pl-k"&gt;in&lt;/span&gt; files:
            total &lt;span class="pl-k"&gt;+=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
            result &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.classify(directory &lt;span class="pl-k"&gt;+&lt;/span&gt; file)
            &lt;span class="pl-k"&gt;if&lt;/span&gt; result &lt;span class="pl-k"&gt;==&lt;/span&gt; category:
                correct &lt;span class="pl-k"&gt;+=&lt;/span&gt; &lt;span class="pl-c1"&gt;1&lt;/span&gt;
        &lt;span class="pl-k"&gt;return&lt;/span&gt; (correct, total)

    &lt;span class="pl-st"&gt;def&lt;/span&gt; &lt;span class="pl-en"&gt;test&lt;/span&gt;(&lt;span class="pl-vpf"&gt;self&lt;/span&gt;, &lt;span class="pl-vpf"&gt;testdir&lt;/span&gt;):
        &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"""&lt;/span&gt;测试集的目录结构和训练集相同&lt;span class="pl-pds"&gt;"""&lt;/span&gt;&lt;/span&gt;
        categories &lt;span class="pl-k"&gt;=&lt;/span&gt; os.listdir(testdir)
        &lt;span class="pl-c"&gt;# 过滤掉不是目录的元素&lt;/span&gt;
        categories &lt;span class="pl-k"&gt;=&lt;/span&gt; [filename &lt;span class="pl-k"&gt;for&lt;/span&gt; filename &lt;span class="pl-k"&gt;in&lt;/span&gt; categories &lt;span class="pl-k"&gt;if&lt;/span&gt;
                      os.path.isdir(testdir &lt;span class="pl-k"&gt;+&lt;/span&gt; filename)]
        correct &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        total &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-c1"&gt;0&lt;/span&gt;
        &lt;span class="pl-k"&gt;for&lt;/span&gt; category &lt;span class="pl-k"&gt;in&lt;/span&gt; categories:
            &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;.&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt;, end&lt;span class="pl-k"&gt;=&lt;/span&gt;&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt;)
            (catCorrect, catTotal) &lt;span class="pl-k"&gt;=&lt;/span&gt; &lt;span class="pl-v"&gt;self&lt;/span&gt;.testCategory(
                testdir &lt;span class="pl-k"&gt;+&lt;/span&gt; category &lt;span class="pl-k"&gt;+&lt;/span&gt; &lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;'&lt;/span&gt;/&lt;span class="pl-pds"&gt;'&lt;/span&gt;&lt;/span&gt;, category)
            correct &lt;span class="pl-k"&gt;+=&lt;/span&gt; catCorrect
            total &lt;span class="pl-k"&gt;+=&lt;/span&gt; catTotal
        &lt;span class="pl-k"&gt;print&lt;/span&gt;(&lt;span class="pl-s1"&gt;&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;span class="pl-cce"&gt;\n\n&lt;/span&gt;Accuracy is  &lt;span class="pl-c1"&gt;%f%%&lt;/span&gt;  (&lt;span class="pl-c1"&gt;%i&lt;/span&gt; test instances)&lt;span class="pl-pds"&gt;"&lt;/span&gt;&lt;/span&gt; &lt;span class="pl-k"&gt;%&lt;/span&gt;
              ((&lt;span class="pl-s3"&gt;float&lt;/span&gt;(correct) &lt;span class="pl-k"&gt;/&lt;/span&gt; total) &lt;span class="pl-k"&gt;*&lt;/span&gt; &lt;span class="pl-c1"&gt;100&lt;/span&gt;, total))
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-21">
                21
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-22">
                22
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-23">
                23
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-24">
                24
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-25">
                25
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-57685db5b3651184496001-26">
                26
               </div>
               <div class="crayon-num" data-line="crayon-57685db5b3651184496001-27">
                27
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-57685db5b3651184496001-1">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 def
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 testCategory
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 directory
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-2">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 files
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 listdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 directory
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-3">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-4">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-5">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 file
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 files
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-6">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-7">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 result
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 classify
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 directory
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 file
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-8">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 result
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-9">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 1
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-10">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 return
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-11">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-12">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-st"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 def
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-en"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 test
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-vpf"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 testdir
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-13">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;测试集的目录结构和训练集相同&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-s">
                 ""
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-14">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 listdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 testdir
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-15">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 # 过滤掉不是目录的元素&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-16">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-17">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 os
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 path
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 isdir
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 testdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 filename
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-18">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-19">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-20">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 categories
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-21">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;.&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 end
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-22">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 catCorrect
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 catTotal
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-v"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-r">
                 self
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 testCategory
                </span>
                <span class="crayon-sy">
                 (
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-23">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 testdir
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 '&lt;/span&gt;/&lt;span class="pl-pds"&gt;'
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 category
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-24">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 catCorrect
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-25">
                <span class="crayon-e">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 +=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 catTotal
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-57685db5b3651184496001-26">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 print
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-pds"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "&lt;/span&gt;&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 cce
                </span>
                <span class="crayon-s">
                 "&gt;\n\n&lt;/span&gt;Accuracy is  &lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 c1
                </span>
                <span class="crayon-s">
                 "&gt;%f%%&lt;/span&gt;  (&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 c1
                </span>
                <span class="crayon-s">
                 "&gt;%i&lt;/span&gt; test instances)&lt;span class="
                </span>
                <span class="crayon-v">
                 pl
                </span>
                <span class="crayon-o">
                 -
                </span>
                <span class="crayon-i">
                 pds
                </span>
                <span class="crayon-s">
                 "&gt;"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 %
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-57685db5b3651184496001-27">
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-s3"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-t">
                 float
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 correct
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 *
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "pl-c1"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 100
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 ,
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 total
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.1200 seconds] -->
       </div>
       <p>
        在不使用停词列表的情况下，这个分类器的效果是：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-29.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-29.png"/>
        </a>
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-30.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-30.png"/>
        </a>
       </p>
       <blockquote>
        <p>
         准确率77.77%，看起来很不错。如果用了停词列表效果会如何呢？
        </p>
        <p>
         那让我们来测试一下吧！
        </p>
       </blockquote>
       <p>
        请自行到网络上查找一些停词列表，并填写以下表格：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-31.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-31.png"/>
        </a>
       </p>
       <p>
        我找到了两个停词列表，分别是包含
        <a href="http://nlp.stanford.edu/IR-book/html/htmledition/dropping-common-terms-stop-words-1.html">
         25个词
        </a>
        和
        <a href="http://www.ranks.nl/stopwords">
         174个词
        </a>
        的列表，结果如下：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-32.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-32.png"/>
        </a>
       </p>
       <p>
        看来第二个停词列表能提升2%的效果，你的结果如何？
       </p>
       <h2>
        <a class="anchor" href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md#%E6%9C%B4%E7%B4%A0%E8%B4%9D%E5%8F%B6%E6%96%AF%E4%B8%8E%E6%83%85%E6%84%9F%E5%88%86%E6%9E%90" id="user-content-朴素贝叶斯与情感分析">
        </a>
        朴素贝叶斯与情感分析
       </h2>
       <p>
        情感分析的目的是判断作者的态度或意见：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-33.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-331.png"/>
        </a>
       </p>
       <p>
        情感分析的例子之一是判断一篇评论是正面的还是反面的，我们可以用朴素贝叶斯算法来实现。我们可以用Pang&amp;Lee 2004的影评数据来测试，这份数据集包含1000个正面和1000个负面的评价，以下是一些示例：
       </p>
       <blockquote>
        <p>
         本月第二部连环杀人犯电影实在太糟糕了！虽然开头的故事情节和场景布置还可以，但后面就……
        </p>
        <p>
         当我听说罗密欧与朱丽叶又出了一部改编电影后，心想莎士比亚的经典又要被糟蹋了。不过我错了，Baz Luhrman导演的水平还是高的……
        </p>
       </blockquote>
       <p>
        你可以从
        <a href="http://www.cs.cornell.edu/People/pabo/movie-review-data/">
         http://www.cs.cornell.edu/People/pabo/movie-review-data/
        </a>
        上下载这个数据集，并整理成以下形式：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-34.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-34.png"/>
        </a>
       </p>
       <p>
        你也可以从
        <a href="http://guidetodatamining.com/guide/data/reviewPolarityBuckets.zip">
         这里
        </a>
        下载整理好的数据。
       </p>
       <p>
        <strong>
         动手实践
        </strong>
       </p>
       <p>
        你可以为上文的朴素贝叶斯分类器增加十折交叉验证的逻辑吗？它的输出结果应该是如下形式：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-35.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-35.png"/>
        </a>
       </p>
       <p>
        另外，请计算Kappa指标。
       </p>
       <p>
        <strong>
         再次声明：只看不练是不行的，就好比你不可能通过阅读乐谱就学会弹奏钢琴。
        </strong>
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-36.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-36.png"/>
        </a>
       </p>
       <p>
        <strong>
         解答
        </strong>
       </p>
       <p>
        这是我得到的结果：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-37.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-37.png"/>
        </a>
       </p>
       <p>
        Kappa指标则是：
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/img/chapter-7/chapter-7-38.png" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2015/01/chapter-7-38.png"/>
        </a>
       </p>
       <p>
        所以我们的分类算法效果是不错的。
       </p>
       <p>
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/code/chapter-7/bayesSentiment.py">
         代码链接
        </a>
       </p>
       <p>
        英文原文：
        <a href="http://guidetodatamining.com/chapter-7/">
         http://guidetodatamining.com/chapter-7/
        </a>
       </p>
       <p>
        文章出处：
        <a href="https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md">
         https://github.com/jizhang/guidetodatamining/blob/master/chapter-7.md
        </a>
       </p>
      </div>
      <div>
       <strong>
        注：转载文章均来自于公开网络，仅供学习使用，不会用于任何商业用途，如果侵犯到原作者的权益，请您与我们联系删除或者授权事宜，联系邮箱：contact@dataunion.org。转载数盟网站文章请注明原文章作者，否则产生的任何版权纠纷与数盟无关。
       </strong>
      </div>
      <!--content_text-->
      <div class="fenxian">
       <!-- JiaThis Button BEGIN -->
       <div class="jiathis_style_32x32">
        <p class="jiathis_button_weixin">
        </p>
        <p class="jiathis_button_tsina">
        </p>
        <p class="jiathis_button_qzone">
        </p>
        <p class="jiathis_button_cqq">
        </p>
        <p class="jiathis_button_tumblr">
        </p>
        <a class="jiathis jiathis_txt jtico jtico_jiathis" href="http://www.jiathis.com/share" target="_blank">
        </a>
        <p class="jiathis_counter_style">
        </p>
       </div>
       <!-- JiaThis Button END -->
      </div>
     </article>
     <!--content-->
     <!--相关文章-->
     <div class="xianguan">
      <div class="xianguantitle">
       相关文章！
      </div>
      <ul class="pic">
       <li>
        <a href="http://dataunion.org/20824.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t018630756a7e263b33-300x165.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20824.html" rel="bookmark" title="如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘">
         如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20679.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t010e3a70fbcfef2c12-300x101.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20679.html" rel="bookmark" title="一篇文章带你认识“高大上”的图数据挖掘">
         一篇文章带你认识“高大上”的图数据挖掘
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20167.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/07/t0170189d0bb57172e9_副本.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20167.html" rel="bookmark" title="浅谈金融行业的数据挖掘之道">
         浅谈金融行业的数据挖掘之道
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/19877.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/07/t01d360f079b2c02f4d.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/19877.html" rel="bookmark" title="想入门机器学习、数据挖掘，我该怎么做？">
         想入门机器学习、数据挖掘，我该怎么做？
        </a>
       </li>
      </ul>
     </div>
     <!--相关文章-->
     <div class="comment" id="comments">
      <!-- You can start editing here. -->
      <!-- If comments are open, but there are no comments. -->
      <div class="title">
       期待你一针见血的评论，Come on！
      </div>
      <div id="respond">
       <p>
        不用想啦，马上
        <a href="http://dataunion.org/wp-login.php?redirect_to=http%3A%2F%2Fdataunion.org%2F8895.html">
         "登录"
        </a>
        发表自已的想法.
       </p>
      </div>
     </div>
     <!-- .nav-single -->
    </div>
    <!--Container End-->
    <aside id="sitebar">
     <div class="sitebar_list2">
      <div class="wptag">
       <span class="tagtitle">
        热门标签+
       </span>
       <div class="tagg">
        <ul class="menu" id="menu-%e5%8f%8b%e6%83%85%e9%93%be%e6%8e%a5">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-1605" id="menu-item-1605">
          <a href="http://taidizh.com/">
           泰迪智慧
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20884" id="menu-item-20884">
          <a href="http://www.transwarp.cn/">
           星环科技
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-3538" id="menu-item-3538">
          <a href="http://datall.org/">
           珈和遥感
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20888" id="menu-item-20888">
          <a href="http://www.chinahadoop.cn/">
           小象学院
          </a>
         </li>
        </ul>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <div class="textwidget">
       <div align="center">
        <a href="http://study.163.com/course/courseMain.htm?courseId=991022" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2016/03/dv.jpg"/>
        </a>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       文章分类
      </h4>
      <div class="tagcloud">
       <a class="tag-link-44" href="http://dataunion.org/category/industry/demo" style="font-size: 10.204724409449pt;" title="4个话题">
        Demo展示
       </a>
       <a class="tag-link-31" href="http://dataunion.org/category/experts" style="font-size: 15.826771653543pt;" title="52个话题">
        专家团队
       </a>
       <a class="tag-link-870" href="http://dataunion.org/category/tech/ai" style="font-size: 19.795275590551pt;" title="273个话题">
        人工智能
       </a>
       <a class="tag-link-488" href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f" style="font-size: 8pt;" title="1个话题">
        加入数盟
       </a>
       <a class="tag-link-869" href="http://dataunion.org/category/tech/viz" style="font-size: 17.204724409449pt;" title="93个话题">
        可视化
       </a>
       <a class="tag-link-30" href="http://dataunion.org/category/partners" style="font-size: 10.645669291339pt;" title="5个话题">
        合作伙伴
       </a>
       <a class="tag-link-889" href="http://dataunion.org/category/parterc" style="font-size: 11.582677165354pt;" title="8个话题">
        合作会议
       </a>
       <a class="tag-link-104" href="http://dataunion.org/category/books" style="font-size: 12.96062992126pt;" title="15个话题">
        图书
       </a>
       <a class="tag-link-220" href="http://dataunion.org/category/tech/base" style="font-size: 19.850393700787pt;" title="281个话题">
        基础架构
       </a>
       <a class="tag-link-219" href="http://dataunion.org/category/tech/analysis" style="font-size: 19.409448818898pt;" title="232个话题">
        数据分析
       </a>
       <a class="tag-link-887" href="http://dataunion.org/category/tech/dm" style="font-size: 13.291338582677pt;" title="17个话题">
        数据挖掘
       </a>
       <a class="tag-link-34" href="http://dataunion.org/category/tech" style="font-size: 20.732283464567pt;" title="404个话题">
        文章
       </a>
       <a class="tag-link-1" href="http://dataunion.org/category/uncategorized" style="font-size: 22pt;" title="693个话题">
        未分类
       </a>
       <a class="tag-link-4" href="http://dataunion.org/category/events" style="font-size: 14.503937007874pt;" title="29个话题">
        活动
       </a>
       <a class="tag-link-890" href="http://dataunion.org/category/tech/%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0" style="font-size: 10.204724409449pt;" title="4个话题">
        深度学习
       </a>
       <a class="tag-link-221" href="http://dataunion.org/category/tech/devl" style="font-size: 18.968503937008pt;" title="193个话题">
        编程语言
       </a>
       <a class="tag-link-888" href="http://dataunion.org/category/career" style="font-size: 15.661417322835pt;" title="48个话题">
        职业规划
       </a>
       <a class="tag-link-5" href="http://dataunion.org/category/jobs" style="font-size: 14.11811023622pt;" title="25个话题">
        职位
       </a>
       <a class="tag-link-871" href="http://dataunion.org/category/industry" style="font-size: 15.716535433071pt;" title="49个话题">
        行业
       </a>
       <a class="tag-link-613" href="http://dataunion.org/category/industry/case" style="font-size: 16.984251968504pt;" title="84个话题">
        行业应用
       </a>
       <a class="tag-link-885" href="http://dataunion.org/category/industry/news" style="font-size: 17.425196850394pt;" title="102个话题">
        行业资讯
       </a>
       <a class="tag-link-10" href="http://dataunion.org/category/training" style="font-size: 14.228346456693pt;" title="26个话题">
        课程
       </a>
       <a class="tag-link-16" href="http://dataunion.org/category/sources" style="font-size: 15.661417322835pt;" title="48个话题">
        资源
       </a>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       功能
      </h4>
      <ul>
       <li>
        <a href="http://dataunion.org/wp-login.php?action=register">
         注册
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/wp-login.php">
         登录
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/feed">
         文章
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/comments/feed">
         评论
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="https://cn.wordpress.org/" title="基于WordPress，一个优美、先进的个人信息发布平台。">
         WordPress.org
        </a>
       </li>
      </ul>
     </div>
    </aside>
    <div class="clear">
    </div>
   </div>
   <!--main-->
   ﻿
   <footer id="dibu">
    <div class="about">
     <div class="right">
      <ul class="menu" id="menu-%e5%ba%95%e9%83%a8%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-18024" id="menu-item-18024">
        <a href="http://dataunion.org/category/partners">
         合作伙伴
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20881" id="menu-item-20881">
        <a href="http://dataunion.org/contribute">
         文章投稿
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20872" id="menu-item-20872">
        <a href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f">
         加入数盟
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22441" id="menu-item-22441">
        <a href="http://dataunion.org/f-links">
         友情链接
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20874" id="menu-item-20874">
        <a href="http://dataunion.org/aboutus">
         关于数盟
        </a>
       </li>
      </ul>
      <p class="banquan">
       数盟社区        ，
        做最棒的数据科学社区
      </p>
     </div>
     <div class="left">
      <ul class="bottomlist">
       <li>
        <a href="http://weibo.com/DataScientistUnion  " target="_blank" 　title="">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weibo.png"/>
        </a>
       </li>
       <li>
        <a class="cd-popup-trigger" href="http://dataunion.org/8895.html#0">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weixin.png"/>
        </a>
       </li>
      </ul>
      <div class="cd-popup">
       <div class="cd-popup-container">
        <h1>
         扫描二维码,加微信公众号
        </h1>
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/2014-12-06-1515289049.png"/>
        <a class="cd-popup-close" href="http://dataunion.org/8895.html">
        </a>
       </div>
       <!-- cd-popup-container -->
      </div>
      <!-- cd-popup -->
     </div>
    </div>
    <!--about-->
    <div class="bottom">
     <a href="http://dataunion.org/">
      数盟社区
     </a>
     <a href="http://www.miitbeian.gov.cn/" rel="external nofollow" target="_blank">
      京ICP备14026740号
     </a>
     联系我们：
     <a href="mailto:contact@dataunion.org" target="_blank">
      contact@dataunion.org
     </a>
     <div class="tongji">
     </div>
     <!--bottom-->
     <div class="scroll" id="scroll" style="display:none;">
      ︿
     </div>
    </div>
   </footer>
   <!--dibu-->
  </div>
 </body>
</html>